<--- %%NOBANNER%% --> bigsort.sas
 BackForward

/*-------------------------------------------------------------------------------\
| SOURCE:   bigsort.sas                                                          |
|                                                                                |
| VERSION:  RDS v3.0                                                             |
|           Part of the SPIKEware Rapid Decision Support SAS Macro Package       |
|           for SAS v8.x and above                                               |
|                                                                                |
| PURPOSE:  This macro breaks a dataset into smaller datasets, sorts those       |
|           smaller datasets, and then merges back together in ordre.  It is     |
|           designed for sorting a very large dataset more efficiently.          |
|                                                                                |
| SYNTAX:   %checkall (indata  = _SAS_dataset_name_,                   |
|                      outdata = _SAS_output_dataset_name_,            |
|                      by      = _SAS_sorted_by_variable_list_,        |
|                      parts   = _number_of_pieces_to_break_into_)     |
\-------------------------------------------------------------------------------*/
%macro bigsort(indata=, outdata=, by=, parts=10) / des='Sorts very large datasets';
%local indata outdata by notes starttime i;
%let notes = %sysfunc(getoption(notes,keyword));
options nonotes;
%let starttime = %sysfunc(datetime());

/*---------------------------------------------------\
| complete simple diagnostic of PARTS macro variable |
\---------------------------------------------------*/
%if &parts < 5 %then %let parts = 5;
%else %if &parts > 25 %then %let parts = 25;

/*-------------------------------------------------------\
| if the data is less than 500,000 records, just sort it |
+--------------------------------------------------------+-------------------------------------+
| NOTE:  Depending on your operating system and environment, you may want to change this value |
\---------------------------------------------------------------------------------------------*/

%if %nobs(&indata) < 500000 %then %do;
   proc sort data=&indata out=&outdata; by &by; run;
%end ;
%else %do;
   /*-------------------------------------\
   | determine size of each chunk of data |
   \-------------------------------------*/
   %let smallobscnt = %scan(%eval(%nobs(&indata)/&parts), 1) ;

   /*----------------------------------------------------\
   | loop through the first N-1 parts, sorting each part |
   \----------------------------------------------------*/
   %do i = 0 %to %eval(&parts - 2);
      proc sort data = &indata (firstobs=%eval(&smallobscnt*&i+1) obs=%eval(&smallobscnt*&i + &smallobscnt))
                out  = sorted%eval(&i+1); by &by ; run ;
   %end;
   /*-------------------\
   | sort the last part |
   \-------------------*/
   proc sort data=&indata(firstobs=%eval(&smallobscnt*(&parts-1)+1)) out=sorted&parts; by &by; run;

   /*-----------------------------\
   | merge the data back together |
   \-----------------------------*/
   data &outdata; set %_prefix(sorted,1 to &parts); by &by; run;

   proc datasets library=macro nolist memtype=(view data); delete sorted1-sorted&parts; run; quit;
%end;
/*----------------------------------\
| prepare SAS session for open code |
\----------------------------------*/
options ¬es;
%put NOTE: There were %nobs(&indata) observations read from the data set %data(&indata).;
%put NOTE: The data set &outdata has %nobs(&outdata) observations and %nvars(&outdata) variables. ;
%timenote (macro=bigsort, starttime=&starttime); %put;
%mend bigsort ;